In [1]:
#Importing packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
In [2]:
df =pd.read_csv('SUPPLY DATA_Merged.csv')
df.head()
Out[2]:
Product type SKU Price Availability Number of products sold Revenue generated Customer demographics Stock levels Lead times Order quantities ... Inspection results Defect rates Transportation modes Routes Costs Scheduled Delivery Date Delivered to Client Date Delivery Recorded Date Product Group Brand
0 haircare SKU0 69.808006 55 802 8661.996792 Non-binary 58 7 96 ... Pending 0.226410 Road Route B 187.752075 2-Jun-06 2-Jun-06 2-Jun-06 HRDT Loreal
1 skincare SKU1 14.843523 95 736 7460.900065 Female 53 30 37 ... Pending 4.854068 Road Route B 503.065579 14-Nov-06 14-Nov-06 14-Nov-06 ARV Cetaphil
2 haircare SKU2 11.319683 34 8 9577.749626 Unknown 1 10 88 ... Pending 4.580593 Air Route C 141.920282 27-Aug-06 27-Aug-06 27-Aug-06 HRDT Tresemme
3 skincare SKU3 61.163343 68 83 7766.836426 Non-binary 23 13 59 ... Fail 4.746649 Rail Route A 254.776159 1-Sep-06 1-Sep-06 1-Sep-06 ARV Nivea
4 skincare SKU4 4.805496 26 871 2686.505152 Non-binary 5 3 56 ... Fail 3.145580 Air Route A 923.440632 11-Aug-06 11-Aug-06 11-Aug-06 ARV Clinique

5 rows × 29 columns

In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 29 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Product type              100 non-null    object 
 1   SKU                       100 non-null    object 
 2   Price                     100 non-null    float64
 3   Availability              100 non-null    int64  
 4   Number of products sold   100 non-null    int64  
 5   Revenue generated         100 non-null    float64
 6   Customer demographics     100 non-null    object 
 7   Stock levels              100 non-null    int64  
 8   Lead times                100 non-null    int64  
 9   Order quantities          100 non-null    int64  
 10  Shipping times            100 non-null    int64  
 11  Shipping carriers         100 non-null    object 
 12  Shipping costs            100 non-null    float64
 13  Supplier name             100 non-null    object 
 14  Location                  100 non-null    object 
 15  Lead time                 100 non-null    int64  
 16  Production volumes        100 non-null    int64  
 17  Manufacturing lead time   100 non-null    int64  
 18  Manufacturing costs       100 non-null    float64
 19  Inspection results        100 non-null    object 
 20  Defect rates              100 non-null    float64
 21  Transportation modes      100 non-null    object 
 22  Routes                    100 non-null    object 
 23  Costs                     100 non-null    float64
 24  Scheduled Delivery Date   100 non-null    object 
 25  Delivered to Client Date  100 non-null    object 
 26  Delivery Recorded Date    100 non-null    object 
 27  Product Group             100 non-null    object 
 28  Brand                     100 non-null    object 
dtypes: float64(6), int64(9), object(14)
memory usage: 22.8+ KB
In [4]:
print('No. of rows and columns in dataset',df.shape)
No. of rows and columns in dataset (100, 29)
In [5]:
#To check the statistics about the dataset
df.describe().style.background_gradient(cmap='autumn_r')
Out[5]:
  Price Availability Number of products sold Revenue generated Stock levels Lead times Order quantities Shipping times Shipping costs Lead time Production volumes Manufacturing lead time Manufacturing costs Defect rates Costs
count 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000
mean 49.462461 48.400000 460.990000 5776.048187 47.770000 15.960000 49.220000 5.750000 5.548149 17.080000 567.840000 14.770000 47.266693 2.277158 529.245782
std 31.168193 30.743317 303.780074 2732.841744 31.369372 8.785801 26.784429 2.724283 2.651376 8.846251 263.046861 8.912430 28.982841 1.461366 258.301696
min 1.699976 1.000000 8.000000 1061.618523 0.000000 1.000000 1.000000 1.000000 1.013487 1.000000 104.000000 1.000000 1.085069 0.018608 103.916248
25% 19.597823 22.750000 184.250000 2812.847151 16.750000 8.000000 26.000000 3.750000 3.540248 10.000000 352.000000 7.000000 22.983299 1.009650 318.778455
50% 51.239830 43.500000 392.500000 6006.352023 47.500000 17.000000 52.000000 6.000000 5.320534 18.000000 568.500000 14.000000 45.905622 2.141863 520.430444
75% 77.198228 75.000000 704.250000 8253.976920 73.000000 24.000000 71.250000 8.000000 7.601695 25.000000 797.000000 23.000000 68.621026 3.563995 763.078231
max 99.171329 100.000000 996.000000 9866.465458 100.000000 30.000000 96.000000 10.000000 9.929816 30.000000 985.000000 30.000000 99.466109 4.939255 997.413450
In [6]:
#To find some correlation
plt.figure(figsize=(16,7))
sns.heatmap(df.corr(),annot=True,cmap='autumn_r')
plt.show()
In [7]:
#To check any null values in the dataset
df.isna().sum()/len(df)
Out[7]:
Product type                0.0
SKU                         0.0
Price                       0.0
Availability                0.0
Number of products sold     0.0
Revenue generated           0.0
Customer demographics       0.0
Stock levels                0.0
Lead times                  0.0
Order quantities            0.0
Shipping times              0.0
Shipping carriers           0.0
Shipping costs              0.0
Supplier name               0.0
Location                    0.0
Lead time                   0.0
Production volumes          0.0
Manufacturing lead time     0.0
Manufacturing costs         0.0
Inspection results          0.0
Defect rates                0.0
Transportation modes        0.0
Routes                      0.0
Costs                       0.0
Scheduled Delivery Date     0.0
Delivered to Client Date    0.0
Delivery Recorded Date      0.0
Product Group               0.0
Brand                       0.0
dtype: float64
In [8]:
#Separating numerical and the categorical columns
categorical=[x for x in df.columns if df[x].dtypes=='object']
numerical=[x for x in df.columns if df[x].dtypes!='object']
print(categorical)
print(numerical)
['Product type', 'SKU', 'Customer demographics', 'Shipping carriers', 'Supplier name', 'Location', 'Inspection results', 'Transportation modes', 'Routes', 'Scheduled Delivery Date', 'Delivered to Client Date', 'Delivery Recorded Date', 'Product Group', 'Brand']
['Price', 'Availability', 'Number of products sold', 'Revenue generated', 'Stock levels', 'Lead times', 'Order quantities', 'Shipping times', 'Shipping costs', 'Lead time', 'Production volumes', 'Manufacturing lead time', 'Manufacturing costs', 'Defect rates', 'Costs']

EDA

In [9]:
#Using count plots, visualizing the categorical columns 
for x in df.select_dtypes(include='object'):
    sns.countplot(data=df,x=df[x])
    plt.xticks(rotation=85)
    plt.show()
In [10]:
#Using Boxplot, visualizing the numerical columns 
plt.figure(figsize=(14,6))
df.boxplot(grid=False, rot=50, fontsize=14)
Out[10]:
<AxesSubplot:>
In [11]:
#Bar chart for each product type with Revenue generated
ax = df.groupby(['Product type'])[['Revenue generated']].sum()\
    .plot(kind='bar', figsize=(15, 7), title="Revenue generated by Product type", fontsize=18, color=['green'])

ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Revenue generated", fontsize=14)

# Add data labels
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

plt.show()
In [12]:
#Bar chart for each product type with Order quantities
ax = df.groupby(['Product type'])[['Order quantities']].sum()\
    .plot(kind='bar', figsize=(15, 7), title="Demand graph by Product type", fontsize=18, color=['orange'])

ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Order quantities", fontsize=14)

# Add data labels
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

plt.show()
In [13]:
#Bar chart for each product type with availability
ax = df.groupby(['Product type'])[['Availability']].sum()\
    .plot(kind='bar', figsize=(15, 7), title="Supply graph by Product type", fontsize=18, color=['blue'])

ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Availability", fontsize=14)

# Add data labels
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

plt.show()
In [14]:
# Scatter plot for demand and supply
plt.scatter(df['Order quantities'], df['Availability'])

# Calculate the best-fit line
fit = np.polyfit(df['Order quantities'], df['Availability'], 1)
line = np.poly1d(fit)

# Plot the best-fit line
plt.plot(df['Order quantities'], line(df['Order quantities']), color='red')

# Set labels and title
plt.xlabel('Order quantities')
plt.ylabel('Availability')
plt.title('Demand vs Supply')

# Display the plot
plt.show()

Observations

In [15]:
#Which brand generated more revenue by product
df.groupby(['Brand','Product type'])[['Revenue generated']].sum()\
.sort_index()\
.sort_values(by='Brand',ascending=False)\
.unstack()\
.style.background_gradient(cmap='rainbow')
Out[15]:
  Revenue generated
Product type cosmetics haircare skincare
Brand      
Cetaphil nan nan 89366.954339
Clinique nan nan 54138.670559
Dove nan 13097.824623 nan
Loreal nan 86006.445676 nan
Mac 41408.875079 nan nan
Maybelline 63995.436594 nan nan
Nivea nan nan 98122.537235
Revlon 56116.954328 nan nan
Tresemme nan 75351.120307 nan
In [16]:
#Categorizing each product by total price, available quantities
df.groupby(['Product type']).sum()\
.sort_index()\
.style.background_gradient(cmap='autumn_r')
Out[16]:
  Price Availability Number of products sold Revenue generated Stock levels Lead times Order quantities Shipping times Shipping costs Lead time Production volumes Manufacturing lead time Manufacturing costs Defect rates Costs
Product type                              
cosmetics 1491.387498 1332 11757 161521.266001 1525 400 1343 171 157.563663 352 12461 346 1119.371253 49.901461 13366.397283
haircare 1564.485482 1471 13611 174455.390606 1644 528 1480 191 200.863735 636 19957 580 1647.571776 84.427107 17328.862865
skincare 1890.373155 2037 20731 241628.162133 1608 668 2099 213 196.387510 720 24366 551 1959.726295 93.387231 22229.318068
In [17]:
#Categorizing each product by total price, available quantities
df.groupby(['Brand']).sum()\
.sort_index()\
.style.background_gradient(cmap='autumn_r')
Out[17]:
  Price Availability Number of products sold Revenue generated Stock levels Lead times Order quantities Shipping times Shipping costs Lead time Production volumes Manufacturing lead time Manufacturing costs Defect rates Costs
Brand                              
Cetaphil 679.180064 829 7064 89366.954339 531 259 814 75 74.715577 276 7835 214 688.558450 36.760952 8001.578017
Clinique 430.836306 489 5616 54138.670559 456 175 475 61 46.296781 190 6605 134 445.792529 20.603057 5114.036373
Dove 159.997358 206 1722 13097.824623 295 110 288 26 23.300682 61 1233 67 159.804923 11.050097 2269.530461
Loreal 814.539108 724 7420 86006.445676 753 183 730 85 88.893551 274 9527 275 828.304701 36.014933 7518.460083
Mac 338.603986 414 3246 41408.875079 320 140 473 53 42.887890 102 2763 80 354.598806 18.848161 4446.638071
Maybelline 656.803226 552 5559 63995.436594 737 112 477 74 77.802552 139 5856 142 350.769597 17.444996 5490.698963
Nivea 780.356785 719 8051 98122.537235 621 234 810 77 75.375152 254 9926 203 825.375316 36.023222 9113.703678
Revlon 495.980285 366 2952 56116.954328 468 148 393 44 36.873220 111 3842 124 414.002850 13.608304 3429.060249
Tresemme 589.949016 541 4469 75351.120307 596 235 462 80 88.669501 301 9197 238 659.462152 37.362076 7540.872320
In [18]:
# Grouping the data by 'Product type' and calculating the sum of 'Defect rates'
grouped_data = df.groupby(['Product type'])['Defect rates'].sum()

# Sorting the data in descending order
sorted_data = grouped_data.sort_values(ascending=False)

# Creating a pie chart
labels = ['skincare', 'haircare', 'cosmetics']
sorted_data.plot(kind='pie', labels=labels, autopct='%1.1f%%', title = "Defect rates by Product type", fontsize= 16, figsize=(15,7))

# Displaying the pie chart
plt.show()
In [19]:
import pandas as pd
import plotly.express as px

# Creating the pivot table
pivot_table = pd.pivot_table(df, values='Defect rates', index=['Transportation modes'], aggfunc='mean')

# Creating the pie chart
transportation_chart = px.pie(values=pivot_table["Defect rates"], names=pivot_table.index, 
                              title='Defect Rates by Transportation Mode', hole=0.5,
                              color_discrete_sequence=px.colors.qualitative.Pastel)

# Displaying the pie chart
transportation_chart.show()

Calculations

In [20]:
mean_customer_demand = df['Order quantities'].mean()
mean_available_production_time = df['Manufacturing lead time'].mean()
print("Customer_demand:", mean_customer_demand)
print("Available_production_time:", mean_available_production_time)
Customer_demand: 49.22
Available_production_time: 14.77

Calculation:1 TAKT TIME

In [21]:
# Calculating the takt time
Available_production_time = 14.77
Customer_demand = 49.22

takt_time = Available_production_time / Customer_demand
print("Takt Time:", takt_time, "minutes per unit")

#Calculating minimum workstations
import math

def calculate_workstations(takt_time, processing_time):
    return math.ceil(takt_time / processing_time)

# Example usage
takt_time = 3600  # Takt time in seconds
processing_time = 300  # Processing time per unit in seconds

workstations = calculate_workstations(takt_time, processing_time)
print("Minimum number of workstations required:", workstations)

def calculate_idle_time(total_production_time, actual_working_time):
    idle_time = total_production_time - actual_working_time
    percent_idle_time = (idle_time / total_production_time) * 100
    efficiency_delay = (idle_time / actual_working_time) * 100
    return idle_time, percent_idle_time, efficiency_delay

# Example usage
total_production_time = 480  # Total production time in minutes
actual_working_time = 380  # Actual working time in minutes

idle_time, percent_idle_time, efficiency_delay = calculate_idle_time(total_production_time, actual_working_time)

print("Idle Time:", idle_time, "minutes")
print("Percent Idle Time:", percent_idle_time, "%")
print("Efficiency Delay:", efficiency_delay, "%")
Takt Time: 0.3000812677773263 minutes per unit
Minimum number of workstations required: 12
Idle Time: 100 minutes
Percent Idle Time: 20.833333333333336 %
Efficiency Delay: 26.31578947368421 %

Calculation:2 QUEUE THEORY

In [22]:
#Calculating Queue Theory 
mean_arrival_rate = df['Shipping times'].mean()
mean_service_rate = df['Lead times'].mean()
print("Mean_product_arrival_time:", mean_arrival_rate)
print("Mean_product_service_rate:", mean_service_rate)

Average_number_of_customers_in_system = mean_arrival_rate/ (mean_service_rate-mean_arrival_rate)
print("L(Average_number_of_customers_in_system):", Average_number_of_customers_in_system)
Average_time_a_customer_spends_in_system = 1/(mean_service_rate-mean_arrival_rate)
print("W(Average_time_a_customer_spends_in_system):", Average_time_a_customer_spends_in_system)
Average_number_of_customers_in_queue = (mean_arrival_rate*mean_arrival_rate)/((mean_service_rate)*(mean_service_rate-mean_arrival_rate))
print("Lq(Average_number_of_customers_in_queue):", Average_number_of_customers_in_queue)
Average_time_a_customer_spends_waiting = mean_arrival_rate/ (mean_service_rate* (mean_service_rate- mean_arrival_rate))
print("Wq(Average_time_a_customer_spends_waiting):", Average_time_a_customer_spends_waiting)
Utilization_Factor = mean_arrival_rate/ mean_service_rate
print("P(Utilization_Factor):", Utilization_Factor)
Percent_Idle_Time = 1- mean_arrival_rate/mean_service_rate
print("P(Percent_Idle_Time):(No customers in system):", Percent_Idle_Time)


for num_products in range(5):  # Iterate from 0 to 4
    probability = ((mean_arrival_rate / mean_service_rate) ** num_products) * Percent_Idle_Time
    print(f"Probability of {num_products} products in the system:", probability)
Mean_product_arrival_time: 5.75
Mean_product_service_rate: 15.96
L(Average_number_of_customers_in_system): 0.563173359451518
W(Average_time_a_customer_spends_in_system): 0.0979431929480901
Lq(Average_number_of_customers_in_queue): 0.20289767022846045
Wq(Average_time_a_customer_spends_waiting): 0.03528655134408008
P(Utilization_Factor): 0.36027568922305764
P(Percent_Idle_Time):(No customers in system): 0.6397243107769424
Probability of 0 products in the system: 0.6397243107769424
Probability of 1 products in the system: 0.23047711697790843
Probability of 2 products in the system: 0.08303530216935924
Probability of 3 products in the system: 0.029915600718910754
Probability of 4 products in the system: 0.01077786366752737

Calculation:3 MANUFACURING OF LOREAL MAP PROCESS CALCULATION

In [23]:
df1 =pd.read_csv('supplychain_manufacturing.csv')
df1.head()
Out[23]:
Manufacturing Process Time (in minutes)
0 Formula Ingredients Extraction 240.0
1 Formula Ingredients Transportation 480.0
2 Product Manufacturing 260.0
3 Storage at Distribution Center 600.0
4 Storage at Retailer 800.0
In [24]:
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 2 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0    Manufacturing Process  11 non-null     object 
 1   Time (in minutes)       8 non-null      float64
dtypes: float64(1), object(1)
memory usage: 304.0+ bytes
In [25]:
#To check any null values in the dataset
df1.isna().sum()
Out[25]:
 Manufacturing Process    0
Time (in minutes)         3
dtype: int64
In [26]:
# Define the times for each process
times = [
    240,  # Formula Ingredients Extraction
    480,  # Formula Ingredients Transportation
    260,  # Product Manufacturing
    600,  # Storage at Distribution Center
    800,  # Storage at Retailer
    0,    # Product Use (unknown time, set as 0 for demonstration)
    0,    # Formula End Of Life (unknown time, set as 0 for demonstration)
    2000, # Packaging Recycling
    0,    # Packaging End Of Life (unknown time, set as 0 for demonstration)
    550,  # Packaging Production
    300   # Packaging Materials Extraction
]

# Calculate total time for the whole process
total_time = 0
for time in times:
    total_time += time

print("Total Time:", total_time, "minutes")
Total Time: 5230 minutes

Calculation:4 MEASURING BUSINESS PROCESSES- PART-1

In [27]:
#Calculate Multifactor Productivity Ratio
mean_revenue_generated = df['Revenue generated'].mean()
mean_manufacturing_costs = df['Manufacturing costs'].mean()
mean_costs = df['Costs'].mean()
print("Revenue generated:", mean_revenue_generated)
print("Manufacturing costs:", mean_manufacturing_costs)
print("Costs:", mean_costs)

Multifactor_Productivity_Ratio = mean_revenue_generated/(mean_manufacturing_costs+ mean_costs)
print("Multifactor_Productivity_Ratio is:", Multifactor_Productivity_Ratio)
Revenue generated: 5776.0481874
Manufacturing costs: 47.26669324143001
Costs: 529.2457821540002
Multifactor_Productivity_Ratio is: 10.01894743637283

Calculation:5 MEASURING BUSINESS PROCESSES- PART-2

In [28]:
#Calculate Efficiency Ratio
mean_actual_outputs = df['Number of products sold'].mean()
mean_standard_outputs = df['Production volumes'].mean()
print("Number of products sold:", mean_actual_outputs)
print("Production volumes:", mean_standard_outputs)

Efficiency_Ratio = mean_actual_outputs/mean_standard_outputs
print("Efficiency_Ratio is:", Efficiency_Ratio)

Efficiency_Ratio_Percentage = (mean_actual_outputs/mean_standard_outputs) * 100
print("Efficiency_Ratio Percentage is:", Efficiency_Ratio_Percentage)
Number of products sold: 460.99
Production volumes: 567.84
Efficiency_Ratio is: 0.8118307974077205
Efficiency_Ratio Percentage is: 81.18307974077204

Calculation:6 VALUE INDEX : SOLVED IN EXCEL (SEE ATTACHED)